Load the student scores for the test - here we load the ETH Zurich test data, downloaded from https://pontifex.ethz.ch/s21t5/rate/
The scores are:
test_scores
## # A tibble: 9,671 x 44
## test_version year class A1_B1 A2_B0 A3_B2 A4_B3 A5_B4 A6_B5 A7_B6 A8_B7
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 pre 2017 s21t-000-~ 1 0 1 1 1 0 1 0
## 2 pre 2017 s21t-000-~ 1 0 1 1 1 1 0 1
## 3 pre 2017 s21t-000-~ 1 0 0 0 1 1 1 0
## 4 pre 2017 s21t-000-~ 1 0 1 1 1 1 1 1
## 5 pre 2017 s21t-000-~ 1 0 1 0 2 0 1 0
## 6 pre 2017 s21t-000-~ 0 1 0 0 1 2 0 2
## 7 pre 2017 s21t-000-~ 1 0 1 0 2 1 0 1
## 8 pre 2017 s21t-000-~ 1 1 1 1 2 1 1 2
## 9 pre 2017 s21t-000-~ 1 1 0 1 1 1 1 1
## 10 pre 2017 s21t-000-~ 1 0 1 0 0 1 0 0
## # ... with 9,661 more rows, and 33 more variables: A9_B8 <dbl>, A10_B9 <dbl>,
## # A11_B10 <dbl>, A12_B0 <dbl>, A13_B0 <dbl>, A14_B12 <dbl>, A15_B13 <dbl>,
## # A16_B14 <dbl>, A17_B15 <dbl>, A18_B16 <dbl>, A19_B0 <dbl>, A20_B17 <dbl>,
## # A21_B18 <dbl>, A22_B19 <dbl>, A23_B20 <dbl>, A24_B21 <dbl>, A25_B0 <dbl>,
## # A26_B22 <dbl>, A27_B23 <dbl>, A28_B24 <dbl>, A29_B25 <dbl>, A30_B0 <dbl>,
## # A31_B27 <dbl>, A32_B28 <dbl>, A33_B29 <dbl>, A34_B0 <dbl>, A35_B0 <dbl>,
## # A36_B0 <dbl>, A0_B11 <dbl>, A0_B26 <dbl>, A0_B30 <dbl>, A0_B31 <dbl>, ...
For this analysis, we replace the “2 = I don’t know” scores with NA, so that they will be treated as missing data.
test_scores <- test_scores %>%
mutate(across(starts_with("A"), ~ na_if(., 2))) %>%
# A few students (23 out of 9671) gave "I don't know" as their answer to every question
# Here we remove them, since rows with all NAs cause problems for mirt
filter(!if_all(starts_with("A"), is.na))
TODO - consider mapping this to 0 instead, as it reflects being “not correct”
TODO - also try fitting a 3PL model to see if the “don’t know” responses reduce the guessing parameter
The number of responses from each cohort:
test_scores %>%
group_by(year) %>%
tally() %>%
gt() %>%
data_color(
columns = c("n"),
colors = scales::col_numeric(palette = c("Blues"), domain = NULL)
)
## Warning: The `.dots` argument of `group_by()` is deprecated as of dplyr 1.0.0.
| year | n |
|---|---|
| 2017 | 1678 |
| 2018 | 1746 |
| 2019 | 1995 |
| 2020 | 2188 |
| 2021 | 2041 |
Mean and standard deviation for each item:
test_scores %>%
select(-class, -test_version) %>%
group_by(year) %>%
skim_without_charts() %>%
select(-contains("character."), -contains("numeric.p"), -skim_type) %>%
rename(complete = complete_rate) %>%
# make the table wider, i.e. with separate columns for each year's results, with the year at the start of each column name
pivot_wider(names_from = year, values_from = -c(skim_variable, year), names_glue = "{year}__{.value}") %>%
# put the columns in order by year
select(sort(names(.))) %>%
select(skim_variable, everything()) %>%
# use GT to make the table look nice
gt(rowname_col = "skim_variable") %>%
# group the columns from each year
tab_spanner_delim(delim = "__") %>%
fmt_number(columns = contains("numeric"), decimals = 2) %>%
fmt_percent(columns = contains("complete"), decimals = 0) %>%
# change all the numeric.mean and numeric.sd column names to Mean and SD
cols_label(
.list = test_scores %>% select(year) %>% distinct() %>% transmute(col = paste0(year, "__numeric.mean"), label = "Mean") %>% deframe()
) %>%
cols_label(
.list = test_scores %>% select(year) %>% distinct() %>% transmute(col = paste0(year, "__numeric.sd"), label = "SD") %>% deframe()
) %>%
data_color(
columns = contains("numeric.mean"),
colors = scales::col_numeric(palette = c("Greens"), domain = NULL)
)
| 2017 | 2018 | 2019 | 2020 | 2021 | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| complete | n_missing | Mean | SD | complete | n_missing | Mean | SD | complete | n_missing | Mean | SD | complete | n_missing | Mean | SD | complete | n_missing | Mean | SD | |
| A1_B1 | 99% | 15 | 0.96 | 0.20 | 99% | 16 | 0.96 | 0.19 | 99% | 12 | 0.96 | 0.19 | 99% | 13 | 0.96 | 0.19 | 99% | 16 | 0.96 | 0.19 |
| A2_B0 | 92% | 131 | 0.33 | 0.47 | 94% | 97 | 0.30 | 0.46 | 0% | 1995 | NaN | NA | 0% | 2188 | NaN | NA | 0% | 2041 | NaN | NA |
| A3_B2 | 95% | 77 | 0.65 | 0.48 | 98% | 37 | 0.66 | 0.47 | 99% | 29 | 0.68 | 0.47 | 98% | 48 | 0.67 | 0.47 | 98% | 35 | 0.68 | 0.47 |
| A4_B3 | 98% | 27 | 0.65 | 0.48 | 99% | 23 | 0.63 | 0.48 | 98% | 35 | 0.65 | 0.48 | 99% | 31 | 0.63 | 0.48 | 98% | 42 | 0.62 | 0.49 |
| A5_B4 | 68% | 544 | 0.69 | 0.46 | 75% | 441 | 0.66 | 0.47 | 74% | 513 | 0.66 | 0.47 | 73% | 592 | 0.67 | 0.47 | 75% | 513 | 0.66 | 0.47 |
| A6_B5 | 78% | 362 | 0.90 | 0.30 | 85% | 269 | 0.87 | 0.34 | 85% | 296 | 0.88 | 0.33 | 86% | 299 | 0.89 | 0.32 | 85% | 297 | 0.87 | 0.33 |
| A7_B6 | 99% | 19 | 0.72 | 0.45 | 99% | 13 | 0.71 | 0.45 | 99% | 25 | 0.70 | 0.46 | 99% | 20 | 0.71 | 0.45 | 99% | 16 | 0.69 | 0.46 |
| A8_B7 | 81% | 311 | 0.58 | 0.49 | 89% | 199 | 0.56 | 0.50 | 90% | 195 | 0.58 | 0.49 | 87% | 283 | 0.69 | 0.46 | 87% | 261 | 0.67 | 0.47 |
| A9_B8 | 63% | 613 | 0.73 | 0.44 | 69% | 549 | 0.67 | 0.47 | 69% | 628 | 0.67 | 0.47 | 70% | 661 | 0.62 | 0.48 | 70% | 606 | 0.61 | 0.49 |
| A10_B9 | 73% | 452 | 0.74 | 0.44 | 79% | 367 | 0.69 | 0.46 | 78% | 443 | 0.71 | 0.45 | 78% | 481 | 0.71 | 0.46 | 77% | 474 | 0.69 | 0.46 |
| A11_B10 | 79% | 352 | 0.73 | 0.44 | 83% | 293 | 0.70 | 0.46 | 85% | 299 | 0.68 | 0.46 | 84% | 354 | 0.71 | 0.45 | 82% | 360 | 0.69 | 0.46 |
| A12_B0 | 87% | 216 | 0.78 | 0.41 | 93% | 127 | 0.77 | 0.42 | 0% | 1995 | NaN | NA | 0% | 2188 | NaN | NA | 0% | 2041 | NaN | NA |
| A13_B0 | 87% | 211 | 0.43 | 0.50 | 89% | 188 | 0.40 | 0.49 | 0% | 1995 | NaN | NA | 0% | 2188 | NaN | NA | 0% | 2041 | NaN | NA |
| A14_B12 | 86% | 237 | 0.65 | 0.48 | 89% | 192 | 0.65 | 0.48 | 89% | 229 | 0.66 | 0.47 | 88% | 253 | 0.61 | 0.49 | 87% | 260 | 0.62 | 0.49 |
| A15_B13 | 84% | 269 | 0.55 | 0.50 | 87% | 220 | 0.53 | 0.50 | 86% | 278 | 0.55 | 0.50 | 87% | 279 | 0.52 | 0.50 | 87% | 267 | 0.52 | 0.50 |
| A16_B14 | 75% | 427 | 0.25 | 0.43 | 80% | 341 | 0.25 | 0.43 | 81% | 372 | 0.25 | 0.43 | 81% | 410 | 0.24 | 0.43 | 80% | 404 | 0.25 | 0.43 |
| A17_B15 | 91% | 154 | 0.65 | 0.48 | 93% | 120 | 0.65 | 0.48 | 93% | 133 | 0.64 | 0.48 | 94% | 139 | 0.63 | 0.48 | 94% | 122 | 0.61 | 0.49 |
| A18_B16 | 72% | 468 | 0.53 | 0.50 | 79% | 372 | 0.46 | 0.50 | 76% | 487 | 0.37 | 0.48 | 78% | 492 | 0.35 | 0.48 | 78% | 449 | 0.36 | 0.48 |
| A19_B0 | 66% | 566 | 0.53 | 0.50 | 73% | 468 | 0.47 | 0.50 | 0% | 1995 | NaN | NA | 0% | 2188 | NaN | NA | 0% | 2041 | NaN | NA |
| A20_B17 | 94% | 108 | 0.78 | 0.42 | 95% | 85 | 0.76 | 0.43 | 95% | 101 | 0.78 | 0.42 | 95% | 100 | 0.74 | 0.44 | 95% | 105 | 0.75 | 0.43 |
| A21_B18 | 76% | 408 | 0.67 | 0.47 | 81% | 336 | 0.65 | 0.48 | 82% | 368 | 0.66 | 0.47 | 82% | 401 | 0.63 | 0.48 | 80% | 405 | 0.64 | 0.48 |
| A22_B19 | 72% | 477 | 0.72 | 0.45 | 76% | 420 | 0.68 | 0.47 | 77% | 453 | 0.67 | 0.47 | 78% | 481 | 0.66 | 0.47 | 77% | 479 | 0.65 | 0.48 |
| A23_B20 | 71% | 492 | 0.58 | 0.49 | 77% | 397 | 0.56 | 0.50 | 78% | 439 | 0.54 | 0.50 | 77% | 512 | 0.55 | 0.50 | 77% | 461 | 0.55 | 0.50 |
| A24_B21 | 81% | 326 | 0.73 | 0.44 | 85% | 260 | 0.68 | 0.47 | 87% | 268 | 0.69 | 0.46 | 86% | 313 | 0.66 | 0.47 | 84% | 325 | 0.68 | 0.47 |
| A25_B0 | 95% | 78 | 0.57 | 0.50 | 97% | 46 | 0.75 | 0.43 | 0% | 1995 | NaN | NA | 0% | 2188 | NaN | NA | 0% | 2041 | NaN | NA |
| A26_B22 | 85% | 250 | 0.94 | 0.24 | 87% | 230 | 0.93 | 0.26 | 87% | 261 | 0.93 | 0.26 | 87% | 276 | 0.93 | 0.26 | 87% | 259 | 0.92 | 0.27 |
| A27_B23 | 65% | 590 | 0.60 | 0.49 | 70% | 531 | 0.58 | 0.49 | 71% | 580 | 0.60 | 0.49 | 71% | 635 | 0.56 | 0.50 | 70% | 609 | 0.59 | 0.49 |
| A28_B24 | 61% | 647 | 0.69 | 0.46 | 70% | 529 | 0.65 | 0.48 | 70% | 600 | 0.65 | 0.48 | 68% | 710 | 0.62 | 0.49 | 68% | 646 | 0.60 | 0.49 |
| A29_B25 | 78% | 373 | 0.62 | 0.48 | 82% | 319 | 0.63 | 0.48 | 83% | 344 | 0.61 | 0.49 | 81% | 409 | 0.60 | 0.49 | 81% | 387 | 0.62 | 0.49 |
| A30_B0 | 91% | 145 | 0.83 | 0.38 | 93% | 121 | 0.79 | 0.41 | 0% | 1995 | NaN | NA | 0% | 2188 | NaN | NA | 0% | 2041 | NaN | NA |
| A31_B27 | 87% | 214 | 0.43 | 0.50 | 90% | 166 | 0.41 | 0.49 | 90% | 192 | 0.44 | 0.50 | 88% | 268 | 0.46 | 0.50 | 86% | 280 | 0.47 | 0.50 |
| A32_B28 | 72% | 466 | 0.32 | 0.46 | 75% | 436 | 0.27 | 0.45 | 78% | 446 | 0.26 | 0.44 | 76% | 529 | 0.27 | 0.45 | 76% | 485 | 0.27 | 0.44 |
| A33_B29 | 97% | 57 | 0.81 | 0.39 | 96% | 63 | 0.79 | 0.41 | 97% | 69 | 0.80 | 0.40 | 97% | 76 | 0.76 | 0.43 | 96% | 79 | 0.78 | 0.41 |
| A34_B0 | 70% | 497 | 0.83 | 0.38 | 76% | 425 | 0.81 | 0.39 | 0% | 1995 | NaN | NA | 0% | 2188 | NaN | NA | 0% | 2041 | NaN | NA |
| A35_B0 | 47% | 886 | 0.69 | 0.46 | 54% | 797 | 0.61 | 0.49 | 0% | 1995 | NaN | NA | 0% | 2188 | NaN | NA | 0% | 2041 | NaN | NA |
| A36_B0 | 51% | 830 | 0.44 | 0.50 | 58% | 741 | 0.40 | 0.49 | 0% | 1995 | NaN | NA | 0% | 2188 | NaN | NA | 0% | 2041 | NaN | NA |
| A0_B11 | 0% | 1678 | NaN | NA | 0% | 1746 | NaN | NA | 94% | 124 | 0.61 | 0.49 | 94% | 123 | 0.63 | 0.48 | 95% | 106 | 0.61 | 0.49 |
| A0_B26 | 0% | 1678 | NaN | NA | 0% | 1746 | NaN | NA | 88% | 240 | 0.61 | 0.49 | 87% | 288 | 0.62 | 0.49 | 86% | 290 | 0.61 | 0.49 |
| A0_B30 | 0% | 1678 | NaN | NA | 0% | 1746 | NaN | NA | 79% | 420 | 0.77 | 0.42 | 79% | 464 | 0.77 | 0.42 | 78% | 451 | 0.77 | 0.42 |
| A0_B31 | 0% | 1678 | NaN | NA | 0% | 1746 | NaN | NA | 69% | 618 | 0.50 | 0.50 | 69% | 676 | 0.47 | 0.50 | 75% | 519 | 0.60 | 0.49 |
| A0_B32 | 0% | 1678 | NaN | NA | 0% | 1746 | NaN | NA | 41% | 1179 | 0.49 | 0.50 | 42% | 1266 | 0.51 | 0.50 | 43% | 1154 | 0.49 | 0.50 |
Before applying IRT, we should check that the data satisfies the assumptions needed by the model. In particular, to use a 1-dimensional IRT model, we should have some evidence of unidimensionality in the test scores.
This plot shows the correlations between scores on each pair of items – note that it is restricted to only those items that appear on both versions of the test, since the plotting package did not deal well with missing data:
TODO - check with Mine that this is a valid way to proceed (throwing out questions with missing data)
item_scores <- test_scores %>%
select(-class, -year, -test_version)
item_scores_unchanged_only <- item_scores %>%
select(!contains("B0")) %>% select(!contains("A0"))
cor_ci <- psych::corCi(item_scores_unchanged_only, plot = FALSE)
psych::cor.plot.upperLowerCi(cor_ci)
TODO - add some notes here to explain how to interpret all of this!
There are a few correlations that are not significantly different from 0:
cor_ci$ci %>%
as_tibble(rownames = "corr") %>%
filter(p > 0.05) %>%
arrange(-p) %>%
select(-contains(".e")) %>%
gt() %>%
fmt_number(columns = 2:4, decimals = 3)
| corr | lower | upper | p |
|---|---|---|---|
| A1_B1-A29_B | −0.018 | 0.030 | 0.610 |
| A29_B-A31_B | −0.037 | 0.010 | 0.260 |
| A24_B-A29_B | −0.007 | 0.038 | 0.176 |
Here we redo the correlation calculations with all the items, and check that there are still few cases where the correlations close to 0:
cor_ci2 <- psych::corCi(item_scores, plot = FALSE)
cor_ci2$ci %>%
as_tibble(rownames = "corr") %>%
filter(p > 0.05) %>%
arrange(-p) %>%
select(-contains(".e")) %>%
gt() %>%
fmt_number(columns = 2:4, decimals = 3)
| corr | lower | upper | p |
|---|---|---|---|
| A1_B1-A29_B | −0.018 | 0.030 | 0.618 |
| A29_B-A31_B | −0.041 | 0.013 | 0.311 |
| A2_B0-A34_B | −0.019 | 0.063 | 0.297 |
| A29_B-A0_B30 | −0.010 | 0.042 | 0.234 |
| A1_B1-A2_B0 | −0.012 | 0.055 | 0.217 |
| A29_B-A0_B2 | −0.007 | 0.052 | 0.135 |
| A24_B-A29_B | −0.005 | 0.040 | 0.127 |
| A2_B0-A25_B | −0.008 | 0.065 | 0.126 |
| A29_B-A30_B | −0.006 | 0.077 | 0.097 |
| A12_B-A34_B | −0.006 | 0.089 | 0.084 |
| A16_B-A34_B | −0.005 | 0.084 | 0.081 |
| A1_B1-A12_B | −0.004 | 0.079 | 0.074 |
| A2_B0-A29_B | −0.003 | 0.081 | 0.067 |
| A2_B0-A6_B5 | −0.001 | 0.084 | 0.059 |
| A13_B-A29_B | −0.001 | 0.077 | 0.053 |
The overall picture is that the item scores are well correlated with each other.
Here we again focus on the subset of items that appeared in both tests.
structure <- check_factorstructure(item_scores_unchanged_only)
n <- n_factors(item_scores_unchanged_only)
The choice of 1 dimensions is supported by 6 (26.09%) methods out of 23 (Acceleration factor, R2, VSS complexity 1, Velicer’s MAP, TLI, RMSEA).
plot(n)
summary(n) %>% gt()
| n_Factors | n_Methods |
|---|---|
| 1 | 6 |
| 2 | 2 |
| 3 | 1 |
| 4 | 6 |
| 7 | 1 |
| 10 | 2 |
| 17 | 1 |
| 20 | 1 |
| 26 | 3 |
#n %>% tibble() %>% gt()
fa.parallel(item_scores_unchanged_only, fa = "fa")
## Parallel analysis suggests that the number of factors = 8 and the number of components = NA
item_scores_unchanged_only_and_no_na <- item_scores_unchanged_only %>%
mutate(
across(everything(), ~replace_na(.x, 0))
)
fitfact <- factanal(item_scores_unchanged_only_and_no_na, factors = 1, rotation = "varimax")
print(fitfact, digits = 2, cutoff = 0.3, sort = TRUE)
##
## Call:
## factanal(x = item_scores_unchanged_only_and_no_na, factors = 1, rotation = "varimax")
##
## Uniquenesses:
## A1_B1 A3_B2 A4_B3 A5_B4 A6_B5 A7_B6 A8_B7 A9_B8 A10_B9 A11_B10
## 0.96 0.77 0.80 0.72 0.77 0.78 0.78 0.62 0.65 0.69
## A14_B12 A15_B13 A16_B14 A17_B15 A18_B16 A20_B17 A21_B18 A22_B19 A23_B20 A24_B21
## 0.72 0.88 0.82 0.79 0.84 0.64 0.59 0.61 0.62 0.74
## A26_B22 A27_B23 A28_B24 A29_B25 A31_B27 A32_B28 A33_B29
## 0.70 0.64 0.64 0.87 0.77 0.80 0.88
##
## Loadings:
## [1] 0.52 0.62 0.59 0.56 0.53 0.60 0.64 0.63 0.61 0.51 0.55 0.60 0.60 0.48
## [16] 0.45 0.48 0.47 0.46 0.34 0.42 0.46 0.40 0.36 0.47 0.45 0.35
##
## Factor1
## SS loadings 6.90
## Proportion Var 0.26
##
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 4407.69 on 324 degrees of freedom.
## The p-value is 0
load <- tidy(fitfact)
ggplot(load, aes(x = fl1, y = 0)) +
geom_point() +
geom_label_repel(aes(label = paste0("A", rownames(load))), show.legend = FALSE) +
labs(x = "Factor 1", y = NULL,
title = "Standardised Loadings",
subtitle = "Based upon correlation matrix") +
theme_minimal()
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
load %>%
select(question = variable, factor_loading = fl1) %>%
left_join(test_versions %>% select(question = label, description), by = "question") %>%
arrange(-factor_loading) %>%
gt() %>%
data_color(
columns = contains("factor"),
colors = scales::col_numeric(palette = c("Greens"), domain = NULL)
)
| question | factor_loading | description |
|---|---|---|
| A21_B18 | 0.6435130 | (ln(sin))' |
| A22_B19 | 0.6254470 | hyp.functions |
| A9_B8 | 0.6168320 | trig.ineq. |
| A23_B20 | 0.6140785 | slope tangent |
| A27_B23 | 0.6002246 | int(exp) |
| A20_B17 | 0.5969677 | (exp)' |
| A28_B24 | 0.5963202 | Int = 0 |
| A10_B9 | 0.5879543 | trig.identity |
| A11_B10 | 0.5569525 | period |
| A26_B22 | 0.5471497 | int(poly) |
| A14_B12 | 0.5323281 | limit |
| A5_B4 | 0.5249438 | logs |
| A24_B21 | 0.5063266 | IVT |
| A6_B5 | 0.4835041 | logs |
| A3_B2 | 0.4808511 | arithmetic rules |
| A31_B27 | 0.4744211 | int(abs) |
| A7_B6 | 0.4734288 | graph translation |
| A8_B7 | 0.4638774 | sine pi/3 |
| A17_B15 | 0.4602994 | graph f' |
| A32_B28 | 0.4521843 | FtoC algebra |
| A4_B3 | 0.4460368 | Easy ineq. |
| A16_B14 | 0.4232210 | diff.quotient |
| A18_B16 | 0.3956478 | product rule |
| A29_B25 | 0.3565591 | int even funct |
| A33_B29 | 0.3468449 | difference vectors |
| A15_B13 | 0.3444530 | series |
| A1_B1 | 0.2081748 | linear function |
# To proceed with the IRT analysis, comment out the following line before knitting
#knitr::knit_exit()
Here we also investigate the 4-factor solution, to see whether these factors are interpretable.
fitfact4 <- factanal(item_scores_unchanged_only_and_no_na, factors = 4, rotation = "varimax")
print(fitfact4, digits = 2, cutoff = 0.3, sort = TRUE)
##
## Call:
## factanal(x = item_scores_unchanged_only_and_no_na, factors = 4, rotation = "varimax")
##
## Uniquenesses:
## A1_B1 A3_B2 A4_B3 A5_B4 A6_B5 A7_B6 A8_B7 A9_B8 A10_B9 A11_B10
## 0.91 0.73 0.74 0.69 0.74 0.72 0.77 0.55 0.65 0.67
## A14_B12 A15_B13 A16_B14 A17_B15 A18_B16 A20_B17 A21_B18 A22_B19 A23_B20 A24_B21
## 0.69 0.85 0.72 0.72 0.83 0.51 0.53 0.58 0.60 0.65
## A26_B22 A27_B23 A28_B24 A29_B25 A31_B27 A32_B28 A33_B29
## 0.59 0.61 0.61 0.84 0.74 0.77 0.83
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4
## A20_B17 0.61
## A21_B18 0.52 0.37
## A26_B22 0.53 0.32
## A1_B1
## A3_B2 0.34 0.33
## A4_B3 0.39
## A5_B4 0.42
## A6_B5 0.32
## A7_B6 0.42
## A8_B7 0.34
## A9_B8 0.34 0.30 0.46
## A10_B9 0.39
## A11_B10 0.34 0.30
## A14_B12 0.40
## A15_B13
## A16_B14 0.47
## A17_B15 0.42
## A18_B16
## A22_B19 0.47 0.32
## A23_B20 0.34 0.39
## A24_B21 0.43 0.35
## A27_B23 0.44 0.32
## A28_B24 0.42
## A29_B25 0.34
## A31_B27 0.31
## A32_B28 0.33
## A33_B29 0.36
##
## Factor1 Factor2 Factor3 Factor4
## SS loadings 2.36 2.23 2.11 1.45
## Proportion Var 0.09 0.08 0.08 0.05
## Cumulative Var 0.09 0.17 0.25 0.30
##
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 1137.31 on 249 degrees of freedom.
## The p-value is 1.72e-113
load4 <- tidy(fitfact4)
ggplot(load4, aes(x = fl1, y = fl2)) +
geom_point() +
geom_label_repel(aes(label = paste0("A", rownames(load))), show.legend = FALSE) +
labs(x = "Factor 1", y = "Factor 2",
title = "Standardised Loadings",
subtitle = "Based upon correlation matrix") +
theme_minimal()
main_factors <- load4 %>%
# mutate(factorNone = 0.4) %>% # add this to set the main factor to "None" where all loadings are below 0.4
pivot_longer(names_to = "factor",
cols = contains("fl")) %>%
mutate(value_abs = abs(value)) %>%
group_by(variable) %>%
top_n(1, value_abs) %>%
ungroup() %>%
transmute(main_factor = factor, variable)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
load4 %>%
select(-uniqueness) %>%
# add the info about which is the main factor
left_join(main_factors) %>%
left_join(test_versions %>% select(variable = label, description)) %>%
arrange(main_factor) %>%
select(main_factor, everything()) %>%
# arrange adjectives by descending loading on main factor
rowwise() %>%
mutate(max_loading = max(abs(c_across(starts_with("fl"))))) %>%
group_by(main_factor) %>%
arrange(-max_loading, .by_group = TRUE) %>%
select(-max_loading) %>%
# sort out the presentation
rename("Main Factor" = main_factor, # the _ throws a latex error
"Question" = variable) %>%
mutate_at(
vars(starts_with("fl")),
~ cell_spec(round(., digits = 3), bold = if_else(abs(.) > 0.4, T, F))
) %>%
kable(booktabs = T, escape = F, longtable = T) %>%
kableExtra::collapse_rows(columns = 1, valign = "top") %>%
kableExtra::kable_styling(latex_options = c("repeat_header"))
## Joining, by = "variable"
## Joining, by = "variable"
| Main Factor | Question | fl1 | fl2 | fl3 | fl4 | description |
|---|---|---|---|---|---|---|
| fl1 | A20_B17 | 0.609 | 0.184 | 0.267 | 0.105 | (exp)’ |
| A26_B22 | 0.533 | 0.113 | 0.321 | 0.104 | int(poly) | |
| A21_B18 | 0.52 | 0.366 | 0.184 | 0.185 | (ln(sin))’ | |
| A22_B19 | 0.465 | 0.319 | 0.186 | 0.263 | hyp.functions | |
| A27_B23 | 0.442 | 0.324 | 0.162 | 0.254 | int(exp) | |
| A29_B25 | 0.344 | 0.151 | 0.086 | 0.107 | int even funct | |
| fl2 | A16_B14 | 0.173 | 0.474 | 0.036 | 0.148 | diff.quotient |
| A5_B4 | 0.215 | 0.419 | 0.269 | 0.125 | logs | |
| A14_B12 | 0.286 | 0.4 | 0.244 | 0.105 | limit | |
| A10_B9 | 0.274 | 0.392 | 0.256 | 0.246 | trig.identity | |
| A8_B7 | 0.18 | 0.344 | 0.204 | 0.194 | sine pi/3 | |
| A3_B2 | 0.206 | 0.34 | 0.331 | 0.062 | arithmetic rules | |
| A32_B28 | 0.224 | 0.334 | 0.091 | 0.255 | FtoC algebra | |
| A18_B16 | 0.221 | 0.276 | 0.125 | 0.156 | product rule | |
| fl3 | A24_B21 | 0.16 | 0.124 | 0.43 | 0.35 | IVT |
| A17_B15 | 0.157 | 0.121 | 0.423 | 0.251 | graph f’ | |
| A7_B6 | 0.206 | 0.139 | 0.422 | 0.193 | graph translation | |
| A4_B3 | 0.087 | 0.279 | 0.395 | 0.141 | Easy ineq. | |
| A33_B29 | 0.119 | 0.096 | 0.364 | 0.126 | difference vectors | |
| A11_B10 | 0.222 | 0.268 | 0.336 | 0.304 | period | |
| A6_B5 | 0.268 | 0.279 | 0.319 | 0.075 | logs | |
| A1_B1 | 0.097 | 0.032 | 0.278 | 0.005 | linear function | |
| A15_B13 | 0.064 | 0.233 | 0.266 | 0.133 | series | |
| fl4 | A9_B8 | 0.179 | 0.337 | 0.305 | 0.463 | trig.ineq. |
| A28_B24 | 0.297 | 0.27 | 0.235 | 0.419 | Int = 0 | |
| A23_B20 | 0.339 | 0.29 | 0.226 | 0.389 | slope tangent | |
| A31_B27 | 0.128 | 0.276 | 0.262 | 0.308 | int(abs) |
We can fit a Multidimensional Item Response Theory (mirt) model. From the function definition:
mirt fits a maximum likelihood (or maximum a posteriori) factor analysis model to any mixture of dichotomous and polytomous data under the item response theory paradigm using either Cai's (2010) Metropolis-Hastings Robbins-Monro (MHRM) algorithm.
The process is to first fit the model, and save the result as a model object that we can then parse to get tabular or visual displays of the model that we might want. When fitting the model, we have the option to specify a few arguments, which then get interpreted as parameters to be passed to the model.
fit_2pl <- mirt(
data = item_scores, # just the columns with question scores
#removeEmptyRows = TRUE,
model = 1, # number of factors to extract
itemtype = "2PL", # 2 parameter logistic model
SE = TRUE # estimate standard errors
)
##
Iteration: 1, Log-Lik: -153303.326, Max-Change: 3.43983
Iteration: 2, Log-Lik: -144537.585, Max-Change: 2.83797
Iteration: 3, Log-Lik: -143119.698, Max-Change: 0.56850
Iteration: 4, Log-Lik: -142603.679, Max-Change: 0.32841
Iteration: 5, Log-Lik: -142319.878, Max-Change: 0.19091
Iteration: 6, Log-Lik: -142121.621, Max-Change: 0.24745
Iteration: 7, Log-Lik: -141985.177, Max-Change: 0.14796
Iteration: 8, Log-Lik: -141875.725, Max-Change: 0.18103
Iteration: 9, Log-Lik: -141796.485, Max-Change: 0.10819
Iteration: 10, Log-Lik: -141728.929, Max-Change: 0.14340
Iteration: 11, Log-Lik: -141681.982, Max-Change: 0.08343
Iteration: 12, Log-Lik: -141639.306, Max-Change: 0.10933
Iteration: 13, Log-Lik: -141610.936, Max-Change: 0.04694
Iteration: 14, Log-Lik: -141583.493, Max-Change: 0.07738
Iteration: 15, Log-Lik: -141566.377, Max-Change: 0.04283
Iteration: 16, Log-Lik: -141550.332, Max-Change: 0.06029
Iteration: 17, Log-Lik: -141539.561, Max-Change: 0.03012
Iteration: 18, Log-Lik: -141529.112, Max-Change: 0.04121
Iteration: 19, Log-Lik: -141522.131, Max-Change: 0.02231
Iteration: 20, Log-Lik: -141515.911, Max-Change: 0.03346
Iteration: 21, Log-Lik: -141511.487, Max-Change: 0.01938
Iteration: 22, Log-Lik: -141507.693, Max-Change: 0.02355
Iteration: 23, Log-Lik: -141504.506, Max-Change: 0.01518
Iteration: 24, Log-Lik: -141501.657, Max-Change: 0.01769
Iteration: 25, Log-Lik: -141499.666, Max-Change: 0.01220
Iteration: 26, Log-Lik: -141497.920, Max-Change: 0.00819
Iteration: 27, Log-Lik: -141496.452, Max-Change: 0.00703
Iteration: 28, Log-Lik: -141493.651, Max-Change: 0.00556
Iteration: 29, Log-Lik: -141493.279, Max-Change: 0.00409
Iteration: 30, Log-Lik: -141493.000, Max-Change: 0.00348
Iteration: 31, Log-Lik: -141492.226, Max-Change: 0.00217
Iteration: 32, Log-Lik: -141492.197, Max-Change: 0.00094
Iteration: 33, Log-Lik: -141492.187, Max-Change: 0.00075
Iteration: 34, Log-Lik: -141492.166, Max-Change: 0.00060
Iteration: 35, Log-Lik: -141492.160, Max-Change: 0.00046
Iteration: 36, Log-Lik: -141492.156, Max-Change: 0.00029
Iteration: 37, Log-Lik: -141492.155, Max-Change: 0.00022
Iteration: 38, Log-Lik: -141492.153, Max-Change: 0.00016
Iteration: 39, Log-Lik: -141492.152, Max-Change: 0.00014
Iteration: 40, Log-Lik: -141492.147, Max-Change: 0.00057
Iteration: 41, Log-Lik: -141492.145, Max-Change: 0.00014
Iteration: 42, Log-Lik: -141492.145, Max-Change: 0.00045
Iteration: 43, Log-Lik: -141492.143, Max-Change: 0.00039
Iteration: 44, Log-Lik: -141492.143, Max-Change: 0.00021
Iteration: 45, Log-Lik: -141492.143, Max-Change: 0.00010
Iteration: 46, Log-Lik: -141492.142, Max-Change: 0.00026
Iteration: 47, Log-Lik: -141492.142, Max-Change: 0.00022
Iteration: 48, Log-Lik: -141492.142, Max-Change: 0.00010
Iteration: 49, Log-Lik: -141492.142, Max-Change: 0.00009
##
## Calculating information matrix...
We compute Yen’s \(Q_3\) (1984) to check for any dependence between items after controlling for \(\theta\). This gives a score for each pair of items, with scores above 0.2 regarded as problematic (see DeMars, p. 48).
residuals_2pl %>% as.matrix() %>%
corrplot::corrplot(type = "upper")
This shows that most item pairs are independent, with only one pair showing cause for concern:
residuals_2pl %>%
rownames_to_column(var = "q1") %>%
as_tibble() %>%
pivot_longer(cols = starts_with("A"), names_to = "q2", values_to = "Q3_score") %>%
filter(abs(Q3_score) > 0.2) %>%
filter(parse_number(q1) < parse_number(q2)) %>%
gt()
| q1 | q2 | Q3_score |
|---|---|---|
| A18_B16 | A19_B0 | 0.6718485 |
Items A18 and A19 are on the product and quotient rules.
Given that this violation of the local independence assumption is very mild (particularly as A19 is one of the questions removed from the test), we proceed using this model.
We then compute factor score estimates and augment the existing data frame with these estimates, to keep everything in one place. To do the estimation, we use the fscores() function from the mirt package which takes in a computed model object and computes factor score estimates according to the method specified. We will use the EAP method for factor score estimation, which is the “expected a-posteriori” method, the default. We specify it explicitly below, but the results would have been the same if we omitted specifying the method argument since it’s the default method the function uses.
test_scores <- test_scores %>%
mutate(F1 = fscores(fit_2pl, method = "EAP"))
We can also calculate the model coefficient estimates using a generic function coef() which is used to extract model coefficients from objects returned by modeling functions. We will set the IRTpars argument to TRUE, which means slope intercept parameters will be converted into traditional IRT parameters.
coefs_2pl <- coef(fit_2pl, IRTpars = TRUE)
The resulting object coefs is a list, with one element for each question, and an additional GroupPars element that we won’t be using. The output is a bit long, so we’re only showing a few of the elements here:
coefs_2pl[1:3]
## $A1_B1
## a b g u
## par 1.1074475 -3.394517 0 1
## CI_2.5 0.9624536 -3.743726 NA NA
## CI_97.5 1.2524414 -3.045309 NA NA
##
## $A2_B0
## a b g u
## par 0.6008349 1.446187 0 1
## CI_2.5 0.5104951 1.222858 NA NA
## CI_97.5 0.6911746 1.669515 NA NA
##
## $A3_B2
## a b g u
## par 1.313206 -0.7087592 0 1
## CI_2.5 1.237460 -0.7577961 NA NA
## CI_97.5 1.388952 -0.6597223 NA NA
# coefs_2pl[35:37]
Let’s take a closer look at the first element:
coefs_2pl[1]
## $A1_B1
## a b g u
## par 1.1074475 -3.394517 0 1
## CI_2.5 0.9624536 -3.743726 NA NA
## CI_97.5 1.2524414 -3.045309 NA NA
In this output:
a is discriminationb is difficultyTo make this output a little more user friendly, we should tidy it such that we have a row per question. We’ll do this in two steps. First, write a function that tidies the output for one question, i.e. one list element. Then, map this function over the list of all questions, resulting in a data frame.
tidy_mirt_coefs <- function(x){
x %>%
# melt the list element
melt() %>%
# convert to a tibble
as_tibble() %>%
# convert factors to characters
mutate(across(where(is.factor), as.character)) %>%
# only focus on rows where X2 is a or b (discrimination or difficulty)
filter(X2 %in% c("a", "b")) %>%
# in X1, relabel par (parameter) as est (estimate)
mutate(X1 = if_else(X1 == "par", "est", X1)) %>%
# unite columns X2 and X1 into a new column called var separated by _
unite(X2, X1, col = "var", sep = "_") %>%
# turn into a wider data frame
pivot_wider(names_from = var, values_from = value)
}
Let’s see what this does to a single element in coefs:
tidy_mirt_coefs(coefs_2pl[1])
## # A tibble: 1 x 7
## L1 a_est a_CI_2.5 a_CI_97.5 b_est b_CI_2.5 b_CI_97.5
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A1_B1 1.11 0.962 1.25 -3.39 -3.74 -3.05
And now let’s map it over all elements of coefs:
# use head(., -1) to remove the last element, `GroupPars`, which does not correspond to a question
tidy_2pl <- map_dfr(head(coefs_2pl, -1), tidy_mirt_coefs, .id = "Question") %>%
left_join(test_versions, by = c("Question" = "label"))
A quick peek at the result:
tidy_2pl
## # A tibble: 41 x 12
## Question a_est a_CI_2.5 a_CI_97.5 b_est b_CI_2.5 b_CI_97.5 pre post
## <glue> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 A1_B1 1.11 0.962 1.25 -3.39 -3.74 -3.05 A1 B1
## 2 A2_B0 0.601 0.510 0.691 1.45 1.22 1.67 A2 <NA>
## 3 A3_B2 1.31 1.24 1.39 -0.709 -0.758 -0.660 A3 B2
## 4 A4_B3 1.26 1.18 1.33 -0.585 -0.633 -0.538 A4 B3
## 5 A5_B4 1.04 0.961 1.11 -0.614 -0.683 -0.544 A5 B4
## 6 A6_B5 0.955 0.863 1.05 -2.28 -2.47 -2.09 A6 B5
## 7 A7_B6 1.43 1.34 1.51 -0.842 -0.891 -0.793 A7 B6
## 8 A8_B7 1.01 0.949 1.08 -0.538 -0.597 -0.480 A8 B7
## 9 A9_B8 1.63 1.53 1.74 -0.343 -0.389 -0.296 A9 B8
## 10 A10_B9 1.41 1.32 1.50 -0.689 -0.745 -0.634 A10 B9
## # ... with 31 more rows, and 3 more variables: description <chr>, notes <chr>,
## # outcome <chr>
And a nicely formatted table of the result:
tidy_2pl %>%
select(-pre,-post,-notes) %>%
group_by(outcome) %>%
gt() %>%
fmt_number(columns = contains("a_"), decimals = 2) %>%
fmt_number(columns = contains("b_"), decimals = 2) %>%
data_color(
columns = contains("a_"),
colors = scales::col_numeric(palette = c("Greens"), domain = NULL)
) %>%
data_color(
columns = contains("b_"),
colors = scales::col_numeric(palette = c("Blues"), domain = NULL)
) %>%
tab_spanner(label = "Discrimination", columns = contains("a_")) %>%
tab_spanner(label = "Difficulty", columns = contains("b_")) %>%
cols_label(
a_est = "Est.",
b_est = "Est.",
a_CI_2.5 = "2.5%",
b_CI_2.5 = "2.5%",
a_CI_97.5 = "97.5%",
b_CI_97.5 = "97.5%"
)
| Question | Discrimination | Difficulty | description | ||||
|---|---|---|---|---|---|---|---|
| Est. | 2.5% | 97.5% | Est. | 2.5% | 97.5% | ||
| unchanged | |||||||
| A1_B1 | 1.11 | 0.96 | 1.25 | −3.39 | −3.74 | −3.05 | linear function |
| A3_B2 | 1.31 | 1.24 | 1.39 | −0.71 | −0.76 | −0.66 | arithmetic rules |
| A4_B3 | 1.26 | 1.18 | 1.33 | −0.59 | −0.63 | −0.54 | Easy ineq. |
| A5_B4 | 1.04 | 0.96 | 1.11 | −0.61 | −0.68 | −0.54 | logs |
| A6_B5 | 0.96 | 0.86 | 1.05 | −2.28 | −2.47 | −2.09 | logs |
| A7_B6 | 1.43 | 1.34 | 1.51 | −0.84 | −0.89 | −0.79 | graph translation |
| A8_B7 | 1.01 | 0.95 | 1.08 | −0.54 | −0.60 | −0.48 | sine pi/3 |
| A9_B8 | 1.63 | 1.53 | 1.74 | −0.34 | −0.39 | −0.30 | trig.ineq. |
| A10_B9 | 1.41 | 1.32 | 1.50 | −0.69 | −0.74 | −0.63 | trig.identity |
| A11_B10 | 1.30 | 1.22 | 1.39 | −0.74 | −0.80 | −0.68 | period |
| A14_B12 | 1.36 | 1.28 | 1.44 | −0.50 | −0.55 | −0.45 | limit |
| A15_B13 | 0.98 | 0.91 | 1.04 | −0.20 | −0.25 | −0.14 | series |
| A16_B14 | 1.25 | 1.17 | 1.33 | 1.20 | 1.13 | 1.27 | diff.quotient |
| A17_B15 | 1.20 | 1.13 | 1.28 | −0.55 | −0.60 | −0.50 | graph f' |
| A18_B16 | 0.87 | 0.81 | 0.94 | 0.60 | 0.54 | 0.67 | product rule |
| A20_B17 | 1.81 | 1.70 | 1.91 | −0.96 | −1.00 | −0.91 | (exp)' |
| A21_B18 | 1.64 | 1.54 | 1.73 | −0.42 | −0.46 | −0.37 | (ln(sin))' |
| A22_B19 | 1.63 | 1.54 | 1.73 | −0.51 | −0.56 | −0.47 | hyp.functions |
| A23_B20 | 1.43 | 1.34 | 1.51 | −0.02 | −0.06 | 0.02 | slope tangent |
| A24_B21 | 1.23 | 1.15 | 1.31 | −0.71 | −0.77 | −0.66 | IVT |
| A26_B22 | 1.63 | 1.48 | 1.77 | −2.00 | −2.12 | −1.88 | int(poly) |
| A27_B23 | 1.37 | 1.28 | 1.45 | −0.10 | −0.15 | −0.06 | int(exp) |
| A28_B24 | 1.29 | 1.20 | 1.38 | −0.34 | −0.39 | −0.28 | Int = 0 |
| A29_B25 | 0.32 | 0.27 | 0.37 | −1.38 | −1.66 | −1.10 | int even funct |
| A31_B27 | 1.14 | 1.07 | 1.21 | 0.32 | 0.27 | 0.37 | int(abs) |
| A32_B28 | 1.18 | 1.09 | 1.26 | 1.19 | 1.12 | 1.26 | FtoC algebra |
| A33_B29 | 0.93 | 0.86 | 1.00 | −1.62 | −1.73 | −1.51 | difference vectors |
| removed | |||||||
| A2_B0 | 0.60 | 0.51 | 0.69 | 1.45 | 1.22 | 1.67 | 3D |
| A12_B0 | 0.78 | 0.67 | 0.89 | −1.72 | −1.95 | −1.49 | rational exponents |
| A13_B0 | 0.77 | 0.67 | 0.86 | 0.57 | 0.45 | 0.68 | ellipsoid |
| A19_B0 | 1.07 | 0.95 | 1.20 | 0.16 | 0.07 | 0.25 | quotient rule |
| A25_B0 | 0.44 | 0.36 | 0.53 | −1.56 | −1.89 | −1.24 | velocity |
| A30_B0 | 1.16 | 1.03 | 1.30 | −1.45 | −1.60 | −1.31 | FtoC graph |
| A34_B0 | 0.60 | 0.48 | 0.72 | −2.56 | −3.06 | −2.06 | normal vector |
| A35_B0 | 0.98 | 0.85 | 1.12 | −0.48 | −0.61 | −0.35 | intersect planes |
| A36_B0 | 0.93 | 0.80 | 1.05 | 0.65 | 0.53 | 0.77 | parallel planes |
| added | |||||||
| A0_B11 | 0.74 | 0.67 | 0.81 | −0.72 | −0.82 | −0.63 | rational exponents |
| A0_B26 | 0.94 | 0.86 | 1.02 | −0.53 | −0.60 | −0.45 | FtoC graph |
| A0_B30 | 0.52 | 0.44 | 0.59 | −2.41 | −2.78 | −2.05 | normal vector |
| A0_B31 | 0.75 | 0.67 | 0.82 | −0.06 | −0.15 | 0.03 | vector product |
| A0_B32 | 1.23 | 1.11 | 1.34 | 0.15 | 0.07 | 0.22 | scalar product |
tidy_2pl %>%
write_csv("data-eth/OUT_2pl-results.csv")
tidy_2pl %>%
mutate(qnum = parse_number(Question)) %>%
ggplot(aes(x = qnum, y = b_est, label = Question)) +
geom_errorbar(aes(ymin = b_CI_2.5, ymax = b_CI_97.5), width = 0.2) +
geom_text(colour = "grey") +
geom_point() +
theme_minimal() +
labs(x = "Question",
y = "Difficulty")
This shows the difficulty and discrimination of each of the questions, highlighting those that were added or removed:
tidy_2pl %>%
mutate(qnum = parse_number(Question)) %>%
ggplot(aes(
x = a_est,
y = b_est,
label = case_when(
outcome == "unchanged" ~ "",
outcome == "removed" ~ pre,
outcome == "added" ~ post
),
colour = outcome
)) +
geom_errorbar(aes(ymin = b_CI_2.5, ymax = b_CI_97.5), width = 0, alpha = 0.5) +
geom_errorbar(aes(xmin = a_CI_2.5, xmax = a_CI_97.5), width = 0, alpha = 0.5) +
geom_text_repel() +
geom_point() +
theme_minimal() +
labs(x = "Discrimination",
y = "Difficulty")
Do students from different programmes of study have different distributions of ability?
Compare the distribution of abilities in the year groups (though in this case there is only one).
ggplot(test_scores, aes(F1, y = year, fill = as.factor(year), colour = as.factor(year))) +
geom_density_ridges(alpha=0.5) +
scale_x_continuous(limits = c(-3.5,3.5)) +
labs(title = "Density plot",
subtitle = "Ability grouped by year of taking the test",
x = "Ability", y = "Density",
fill = "Year", colour = "Year") +
theme_minimal()
## Picking joint bandwidth of 0.182
plot(fit_2pl, type = "infoSE", main = "Test information")
plot(fit_2pl, type = "infotrace", main = "Item information curves")
plot(fit_2pl, type = "score", auto.key = FALSE)
We can get individual item surface and information plots using the itemplot() function from the mirt package, e.g.
mirt::itemplot(fit_2pl, item = 1,
main = "Trace lines for item 1")
We can also get the plots for all trace lines, one facet per plot.
plot(fit_2pl, type = "trace", auto.key = FALSE)
Or all of them overlaid in one plot.
plot(fit_2pl, type = "trace", facet_items=FALSE)
An alternative approach is using ggplot2 and plotly to add interactivity to make it easier to identify items.
# store the object
plt <- plot(fit_2pl, type = "trace", facet_items = FALSE)
# the data we need is in panel.args
# TODO - I had to change the [[1]] to [[2]] since the plt has two panels for some reason, with the one we want being the 2nd panel
plt_data <- tibble(
x = plt$panel.args[[2]]$x,
y = plt$panel.args[[2]]$y,
subscripts = plt$panel.args[[2]]$subscripts,
item = rep(colnames(item_scores), each = 200)
) %>%
mutate(
item_no = str_remove(item, "A") %>% as.numeric(),
item = fct_reorder(item, item_no)
)
head(plt_data)
## # A tibble: 6 x 5
## x y subscripts item item_no
## <dbl> <dbl> <int> <fct> <dbl>
## 1 -6 0.0529 201 A1_B1 NA
## 2 -5.94 0.0563 202 A1_B1 NA
## 3 -5.88 0.0600 203 A1_B1 NA
## 4 -5.82 0.0639 204 A1_B1 NA
## 5 -5.76 0.0680 205 A1_B1 NA
## 6 -5.70 0.0723 206 A1_B1 NA
plt_gg <- ggplot(plt_data, aes(x, y,
colour = item,
text = item)) +
geom_line() +
labs(
title = "2PL - Trace lines",
#x = expression(theta),
x = "theta",
#y = expression(P(theta)),
y = "P(theta)",
colour = "Item"
) +
theme_minimal() +
theme(legend.position = "none")
ggplotly(plt_gg, tooltip = "text")
knitr::knit_exit()